/************************************************************************************************************ 
												Master File
									Executes cleaning of data & analysis
									- 	Built on Stata 14 on macOS   -
										Benjamin Lerch (23.10.2023)
************************************************************************************************************/

clear all
set more off
set maxvar 32000

************************************************************************************************************
*												Global paths											   *
************************************************************************************************************

*General (substitute with own path)
global project "/Users/giuseppedigiacomo/Documents/Replication"


*Raw_data
global raw_data_automation "$project/raw_data/automation"
global raw_data_imports "$project/raw_data/imports"
global raw_data_lmarket "$project/raw_data/labor market"
global raw_data_education "$project/raw_data/education"
global raw_data_industry "$project/raw_data/industry"
global raw_data_geo "$project/raw_data/geo"
global contributed_data "$project/raw_data/contributed"

*Temp_data
global clean_data_automation "$project/temp_data/automation"
global clean_data_imports "$project/temp_data/imports"
global clean_data_lmarket "$project/temp_data/labor market"
global clean_data_education "$project/temp_data/education"

*Final_data
global final_data_automation "$project/final_data/automation"
global final_data_imports "$project/final_data/imports"
global final_data_outcomes "$project/final_data/outcomes"
global final_data_covariates "$project/final_data/covariates"

*xwalks
global xwalk_geo "$project/xwalks/xwalks_geography"
global xwalk_ind "$project/xwalks/xwalks_industry"
global xwalk_occ "$project/xwalks/xwalks_occupation"
global xwalk_wage "$project/xwalks/xwalks_wages"

*Lists
global list "$project/raw_data/list"


* Creates folders
shell mkdir "$project/temp_data" // temp data
shell mkdir "$project/temp_data/automation" // temp data
shell mkdir "$project/temp_data/education" // temp data
shell mkdir "$project/temp_data/imports" // temp data
shell mkdir "$project/temp_data/labor market" // temp data

shell mkdir "$project/final_data" // final data
shell mkdir "$project/final_data/automation" // final data
shell mkdir "$project/final_data/covariates" // final data
shell mkdir "$project/final_data/imports" // final data
shell mkdir "$project/final_data/outcomes" // final data

shell mkdir "$project/results" // results
shell mkdir "$project/results/appendix" // results
shell mkdir "$project/results/appendix/figures" // results
shell mkdir "$project/results/appendix/tables" // results
shell mkdir "$project/results/main" // results
shell mkdir "$project/results/main/figures" // results
shell mkdir "$project/results/main/tables" // results

*Tables and figures
global tables_main "$project/results/main/tables"
global figures_main "$project/results/main/figures"
global tables_appendix "$project/results/appendix/tables"
global figures_appendix "$project/results/appendix/figures"

*Dofiles
global do_clean_automation "$project/dofiles/cleaners/automation"
global do_clean_covariates "$project/dofiles/cleaners/covariates"
global do_clean_lmarket "$project/dofiles/cleaners/labor market"
global do_clean_education "$project/dofiles/cleaners/education"
global do_clean_imports "$project/dofiles/cleaners/imports"

global do_build_automation "$project/dofiles/builders/automation"
global do_build_covariates "$project/dofiles/builders/covariates"
global do_build_lmarket "$project/dofiles/builders/labor market"
global do_build_education "$project/dofiles/builders/education"

global do_build_main "$project/dofiles/analysis/main"
global do_build_appendix "$project/dofiles/analysis/appendix"

************************************************************************************************************
*											Regression covariates										   *
************************************************************************************************************

*1990
global demographics_1990     ipums_female_1990 ipums_black_1990 ipums_hispanic_1990 ipums_low_1990 ipums_logpop_1990 ipums_a25_34_1990 ipums_a35_44_1990 ipums_a45_54_1990
global industry_shares_1990  ind_share_manufacturing_1990 ind_share_services_1990 ind_share_research_1990 ind_share_utilities_1990 ind_share_mining_1990 ind_share_construction_1990
global occupations_1990      expo_share_occ_routine_1990 expo_share_occ_offshore_1990
global institutions_1990   public private uniTop30Dummy largest150 anyAid


*1970
global demographics_1970     ipums_female_1970 ipums_black_1970 ipums_low_1970 ipums_logpop_1970 ipums_a25_34_1970 ipums_a35_44_1970 ipums_a45_54_1970
global industry_shares_1970  ind_share_manufacturing_1970 ind_share_low_usage_1970 ind_share_services_1970
global occupations_1970      expo_share_occ_routine_1970 expo_share_occ_offshore_1970

*Period
global demographics          ipums_female ipums_black ipums_hispanic ipums_low ipums_logpop ipums_a25_34 ipums_a35_44 ipums_a45_54
global industry_shares       ind_share_manufacturing ind_share_services ind_share_research ind_share_utilities ind_share_mining ind_share_construction
global occupations           expo_share_occ_routine expo_share_occ_offshore

*CZ of origin and destination
global controls_combi_1990 		ipums_female_1990_d ipums_black_1990_d ipums_hispanic_1990_d ipums_low_1990_d ipums_logpop_1990_d ipums_a25_34_1990_d ipums_a35_44_1990_d ipums_a45_54_1990_d ///
								ind_share_manufacturing_1990_d ind_share_services_1990_d ind_share_research_1990_d ind_share_utilities_1990_d ind_share_mining_1990_d ind_share_construction_1990_d ///
								expo_share_occ_routine_1990_d expo_share_occ_offshore_1990_d public_d private_d uniTop30Dummy_d largest150_d anyAid_d ///
								ipums_female_1990_o ipums_black_1990_o ipums_hispanic_1990_o ipums_low_1990_o ipums_logpop_1990_o ipums_a25_34_1990_o ipums_a35_44_1990_o ipums_a45_54_1990_o ///
								ind_share_manufacturing_1990_o ind_share_services_1990_o ind_share_research_1990_o ind_share_utilities_1990_o ind_share_mining_1990_o ind_share_construction_1990_o ///
								expo_share_occ_routine_1990_o expo_share_occ_offshore_1990_o public_o private_o uniTop30Dummy_o largest150_o anyAid_o 


************************************************************************************************************
*													Cleaning											   *
************************************************************************************************************

******************************************
*	   Automation and technologies 		 *
******************************************

*Clean employment data from KLEMS by IFR industries
run "$do_clean_automation/clean klems data.do" // ok

*Clean employment data from Census by IFR industries
run "$do_clean_automation/clean employment by IFR industries.do" // ok

*Clean robot data from IFR
run "$do_clean_automation/clean ifr data.do" // ok

*Clean shift-component by industry
run "$do_clean_automation/clean shift-component by industry.do" // ok

*Clean exposure to robots by period
run "$do_clean_automation/clean exposure to robots.do" // ok

*Clean IT capital investment, routine, offshorability
run "$do_clean_automation/clean exposure trends.do" // ok

*Clean IT capital intensity and PC adoption
run "$do_clean_automation/clean exposure to IT intensity and PC adoption.do" // ok

*Clean exposure to robots by industry
run "$do_clean_automation/clean exposure to robots by industry.do" // ok

*Clean employment data from Census by IFR industries at the state level
run "$do_clean_automation/clean employment by IFR industries - state.do" // ok

*Clean robot exposure at the state level
run "$do_clean_automation/clean exposure to robots - state.do" // ok

******************************************
*	   		  	Imports 				 *
******************************************

*Clean US import exposure from China
run "$do_clean_imports/clean US imports China.do" // ok

*Clean US import exposure from Europe
run "$do_clean_imports/clean US imports Europe.do" // ok

*Clean CAN import exposure from Europe
run "$do_clean_imports/clean CAN imports Europe.do" // ok

*Clean US import exposure from China at state level
run "$do_clean_imports/clean US imports China - state.do" // ok

******************************************
*	   		  LLM outcomes 				 *
******************************************

*Clean schooling outcomes
run "$do_clean_lmarket/clean schooling.do" // ok

*Clean schooling outcomes for 1970 from metro-Census
run "$do_clean_lmarket/clean schooling - 1970.do" // ok

*Clean replaceability to robots by education level
run "$do_clean_lmarket/clean replaceability - education.do" // ok

*Clean wages and wage premia by education level
run "$do_clean_lmarket/clean wages.do" // ok

*Clean schooling and employment outcomes
run "$do_clean_lmarket/clean schooling - employment.do" // ok

*Clean Census population data for IPEDS schooling outcomes
run "$do_clean_lmarket/clean population - IPEDS denominator.do" // ok

*Clean migration flows
run "$do_clean_lmarket/clean migration.do" // ok

*Clean population by education level
run "$do_clean_lmarket/clean education.do" // ok

*Clean population at state level
run "$do_clean_lmarket/clean population - state.do" // ok

* Clean schooling demographics 1970
run "$do_clean_lmarket/clean schooling - demographics 1970.do" 

* Clean schooling demographics 
run "$do_clean_lmarket/clean schooling - demographics.do" 

*Clean wages and wage premia by high vs low educated
run "$do_clean_lmarket/clean wages - high vs low.do"


******************************************
*	      		 Education 		   	     *
******************************************

*Clean institutions in IPEDS data
run "$do_clean_education/clean institution - details.do" // ok

*Clean response status of institutions in IPEDS data
run "$do_clean_education/clean institutions - response.do" // ok

*Clean locations of institutions at ZIP and FIPS level
run "$do_clean_education/clean institutions location - zip fips.do" // ok

*Clean locations of institutions at commuting zone level
run "$do_clean_education/clean institutions location - czone.do" // ok

*Clean tuition fees of institutions
run "$do_clean_education/clean institutions - tuition fees.do" // ok

*Clean  financial aid of institutions (available from 2000)
run "$do_clean_education/clean institutions - financial aid.do" // ok

*Clean enrollment over 12 months (available from 1993)
run "$do_clean_education/clean enrollment - 12 months.do" // ok

*Clean enrollment in fall
run "$do_clean_education/clean enrollment - fall.do" // ok

*Clean completions by major field of study
run "$do_clean_education/clean completions.do" // ok

*Clean graduation rates
run "$do_clean_education/clean graduation.do" // ok

*Reshape enrollment totals for construction of variables
run "$do_clean_education/clean enrollment - reshape.do" // ok

*Reshape completions by field of study for construction of variables
run "$do_clean_education/clean completions - reshape.do" // ok

*Reshape graduations for construction of variables
run "$do_clean_education/clean graduation - reshape.do" // ok

*Clean institution Controls **** this needs to stay here
run "$do_clean_covariates/clean institution - controls.do" // ok

*Clean institution descriptive statistics
run "$do_clean_education/clean institution - descriptives.do" // ok

*Clean enrollment at state level
run "$do_clean_education/clean enrollment - state.do" // ok


******************************************
*	   		 	Covariates 				 *
******************************************

*Clean covariates on demographics
run "$do_clean_covariates/clean demographics.do" // ok

*Clean covariates on industries
run "$do_clean_covariates/clean industry shares.do" // ok


************************************************************************************************************
*													Building											   *
************************************************************************************************************

******************************************
*	   Automation and technologies 		 *
******************************************

*Build robot exposure and other technology shocks
run "$do_build_automation/build exposure.do" // ok

*Build robot exposure by CZ of origin and destination
run "$do_build_automation/build exposure - origin & destination.do" // ok

*Build robot exposure at state level
run "$do_build_automation/build exposure - state.do" // ok

******************************************
*	   		  LLM outcomes 				 *
******************************************

*Build college enrollment rates
run "$do_build_lmarket/build schooling.do" // ok

*Build college enrollment rates in long-differences
run "$do_build_lmarket/build schooling - long differences.do" // ok

*Build population groups by education level
run "$do_build_lmarket/build education.do" // ok

*Build population groups by education level in long-differences
run "$do_build_lmarket/build education - long differences.do" // ok

*Build college enrollment rates and employment of students
run "$do_build_lmarket/build schooling - employment.do" // ok

*Build wages and wage premium by education level
run "$do_build_lmarket/build wages - premium.do" // ok

*Build migration flows of students
run "$do_build_lmarket/build migration.do" // ok

*Build migration flows of students across states
run "$do_build_lmarket/build migration - state.do" // ok

*Build CZ-specific migration flows 
run "$do_build_lmarket/build migration - CZ-to-CZ.do" // ok

*Build migration flows in long-differences
run "$do_build_lmarket/build migration - long differences.do" // ok

*Build replaceability by robots in long-differences
run "$do_build_lmarket/build replaceability - long differences.do" // ok

*Build college enrollment rates by demographic group
run "$do_build_lmarket/build schooling - demographics.do" 


******************************************
*	      		 Education 		   	     *
******************************************

*Build enrollment outcomes by institutional characteristics
run "$do_build_education/build enrollment.do" // ok

*Build graduation rates
run "$do_build_education/build graduation.do" // ok

*Build completions by field of study
run "$do_build_education/build completion.do" // ok

******************************************
*	   		 	Covariates 				 *
******************************************

*Build covariates by period
run "$do_build_covariates/build covariates.do" // ok

*Build covariates for CZ of destination and CZ of origin
run "$do_build_covariates/build CZ-to-CZ covariates.do" // ok

************************************************************************************************************
*													Analysis											   *
************************************************************************************************************

******************************************
*	   		 Main figures 				 *
******************************************

*Figure 1: Robot adoption at the CZ level, 1993-2007
run "$do_build_main/Figure 1 - map robot exposure.do"

*Figure 2: Robots and schooling by age
run "$do_build_main/Figure 2a - enrollment age.do"

*Figure 2: Robots and schooling by demographics
run "$do_build_main/Figure 2b - enrollment demographics.do"


*Figure 3: Robots and field of study
run "$do_build_main/Figure 3 - field of study.do"

*Figure 5: College education choice
*This figure has been generated in Microsoft Powerpoint

******************************************
*	   		 Main tables 				 *
******************************************

*Table 1: Descriptive statistics
run "$do_build_main/Table 1 - main descriptives.do"

*Table 2: Robots and college enrollment
run "$do_build_main/Table 2 - college enrollment.do"

*Table 3: Robots and college enrollment: Institution characteristics
run "$do_build_main/Table 3 - institution characteristics.do"

*Table 4: Robots and college enrollment by employment status
run "$do_build_main/Table 4 - students employment.do"

*Table 5: Robots and student migration
run "$do_build_main/Table 5 - migration.do"

*Table 6: Robots and graduation rates
run "$do_build_main/Table 6 - graduation.do"

*Table 7: Robots and income
run "$do_build_main/Table 7 - income.do"

******************************************
*	   		Appendix figures 			 *
******************************************

*Figure A1: Robots along the skill distribution
run "$do_build_appendix/Figure A1 - skill distribution.do"

*Figure A2: Share of migrating population, 1990
run "$do_build_appendix/Figure A2 - map migrants.do"

*Figure A3: pptx

*Figure A4: Robot exposure by industry exclusion
run "$do_build_appendix/Figure A4 - industry exclusion.do"

*Figure A5: Robot exposure by industry exclusion
run "$do_build_appendix/Figure A5 - state exp demographics.do"


******************************************
*	   		Appendix tables 			 *
******************************************

*Table A1: Aggregation of CIP codes.
*This table has been generated manually in LaTeX

*Table A2: Descriptive statistics: Industrial robots
run "$do_build_appendix/Table A2 - robots descriptives.do"

*Table A3: Robots and college enrollment until 2014
run "$do_build_appendix/Table A3 - extended period.do"

*Table A4: Robots, imports and college enrollment
run "$do_build_appendix/Table A4 - robots and imports.do"

*Table A5: Robots and incoming students: In-state and out-state students
run "$do_build_appendix/Table A5 - migration states.do"

*Table A6: Robots and employment by education level
run "$do_build_appendix/Table A6 - employment.do"

*Table A7: Robots and college enrollment: Product market competition from Europe
run "$do_build_appendix/Table A7 - product market competition.do"

*Table A8: Robots and college enrollment pre-trends
run "$do_build_appendix/Table A8 - pre-trends.do"

*Table A9: Robots and college enrollment: Placebo test
run "$do_build_appendix/Table A9 - placebo and initial shares.do"

*Table A10: Robots and students’ CZ-specific migration flows
run "$do_build_appendix/Table A10 - CZ-to-CZ flows.do"

*Table A11: Robots and student migration using a 5-year normalization
run "$do_build_appendix/Table A11 - migration 5 years.do"

*Table A12: Robots and college enrollment: Alternative construction of robot exposure measures
run "$do_build_appendix/Table A12 - alternative instruments.do"

*Table A13: Robots and college enrollment: Exclusion of CZs with highest robot exposure
run "$do_build_appendix/Table A13 - exclusion.do"

*Table A14: Robots and college enrollment: Unweighted results
run "$do_build_appendix/Table A14 - unweighted.do"

*Table A15: Robots and college enrollment at state level 
run "$do_build_appendix/Table A15 - state level.do"


